suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(stars)
library(terra)
})EEA AQ Stations
Stations
Create a table with rows for each station and pollutant recorded. Stations have a type (background, industrial, traffic), an area label (rural, urban, suburban), and coordinates in lon/lat.
Station meta data file comes from EEA.
station_meta = read.table("AQ_stations/PanEuropean_metadata.csv", sep = "\t", header = T) |>
dplyr::select(AirQualityStationEoICode, Countrycode, Longitude, Latitude,
StationType = AirQualityStationType,
StationArea = AirQualityStationArea,
AirPollutant = AirPollutantCode) |>
dplyr::mutate(AirPollutant = factor(basename(AirPollutant),
levels = c(6001, 5, 8, 7),
labels = c("PM2.5", "PM10", "NO2", "O3"))) |>
dplyr::filter(!is.na(AirPollutant)) |>
unique() |>
dplyr::mutate(AirQualityStationEoICode = as.factor(AirQualityStationEoICode),
Countrycode = as.factor(Countrycode),
StationType = as.factor(StationType),
StationArea = as.factor(StationArea)) |>
dplyr::group_by(AirQualityStationEoICode, Countrycode,
StationType, StationArea, AirPollutant) |>
dplyr::summarise(Longitude = mean(Longitude), Latitude = mean(Latitude), .groups = 'drop') |>
dplyr::group_by(AirQualityStationEoICode, StationArea, AirPollutant) |>
# if two types registered for 1 station, label them as non-background (n=45).
dplyr::filter(!(dplyr::n() > 1 & !StationType == "background")) |>
dplyr::ungroup()
station_laea =
select(station_meta, AirQualityStationEoICode, Longitude, Latitude) |>
unique() |>
st_as_sf(coords = c("Longitude","Latitude"), crs = 4326) |>
st_transform(st_crs(3035))Supplementary Data
Elevation
elev = "supplementary/EEA_stations_elevation.parquet"
if (!file.exists(elev)){
dem = stars::read_stars("/vsicurl/https://s3.eu-central-1.wasabisys.com/eumap/dtm/dtm_elev.lowestmode_gedi.eml_mf_30m_0..0cm_2000..2018_eumap_epsg3035_v0.3.tif")
Sys.setenv(GDAL_HTTP_MULTIRANGE = 'YES',
GDAL_DISABLE_READDIR_ON_OPEN="YES")
tictoc::tic()
station_laea$Elevation = stars::st_extract(dem, station_laea) |> pull(1)
arrow::write_parquet(st_drop_geometry(station_laea), elev)
tictoc::toc()
} else {
station_elevation = arrow::read_parquet(elev) |>
mutate(Elevation = ifelse(Elevation < -100, NA, Elevation))
}
(station_laea = left_join(station_laea, station_elevation))Joining with `by = join_by(AirQualityStationEoICode)`
Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1608 of `x` matches multiple rows in `y`.
ℹ Row 1608 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
Simple feature collection with 6164 features and 2 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -2677029 ymin: -3074758 xmax: 10002190 ymax: 6182022
Projected CRS: ETRS89-extended / LAEA Europe
# A tibble: 6,164 × 3
AirQualityStationEoICode geometry Elevation
<fct> <POINT [m]> <dbl>
1 4101422 (5959964 2181947) NA
2 4101522 (5959045 2184719) NA
3 AD0942A (3625031 2195164) 10444
4 AD0944A (3627252 2195722) 16231
5 AD0945A (3639878 2196313) 24894
6 AL0203A (5233926 2012682) 8455
7 AL0204A (5127863 1973522) 2143
8 AL0205A (5113104 2073920) 46
9 AL0206A (5106425 2184055) 7169
10 AL0207A (5168620 2057815) 1272
# ℹ 6,154 more rows
Corine Land Cover
clc_8class = "supplementary/CLC_reclass_8.tif"
if (!file.exists(clc_8class)){
lc = rast("supplementary/lcv_landcover.clc_corine_c_100m_0..0cm_2018_eumap_epsg3035_v2020.tif")
NAflag(lc) = 128
summary(lc)
rcl = cbind(c(1:48),
c(1,2,3, rep(4,3), rep(3,3), rep(5,2), rep(6, 11), rep(7,12), rep(8,14)))
lc8 = classify(lc, rcl, filename = clc_8class,
datatype = "INT1U", NAflag = 0, overwrite=T,
gdal=c("COMPRESS=DEFLATE"))
} else {
lc8 = rast(clc_8class)
}
NAflag(lc8) = 128
levels(lc8) = data.frame(id = c(1:8),
CLC8 = c("HDR", "LDR", "IND","TRAF","UGR","AGR","NAT","OTH"))
plot(lc8)
station_laea$CLC8 = extract(lc8, vect(station_laea)) |> pull(2)Population Density
pop = rast("supplementary/JRC_1K_POP_2018.tif")
plot(pop, breaks = c(0,5,10,50,100,500,1000))
station_laea$Population = extract(pop, vect(station_laea)) |> pull(2) |> replace_na(0)Result
station_meta = left_join(station_meta, station_laea)Joining with `by = join_by(AirQualityStationEoICode)`
Warning in left_join(station_meta, station_laea): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3767 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
summary(station_meta) AirQualityStationEoICode Countrycode StationType
IT1170A: 24 IT :2213 background:9952
IT2129A: 16 ES :2054 industrial:2025
TR0066A: 16 DE :1966 traffic :3904
SE0093A: 12 FR :1885
IT1090A: 9 TR :1253
E135019: 8 PL :1024
(Other):15796 (Other):5486
StationArea AirPollutant Longitude Latitude
rural :1817 PM2.5:2862 Min. :-63.081 Min. :-21.34
rural-nearcity: 253 PM10 :4725 1st Qu.: 3.363 1st Qu.: 41.72
rural-regional: 451 NO2 :5123 Median : 10.791 Median : 46.65
rural-remote : 124 O3 :3171 Mean : 10.811 Mean : 46.34
suburban :3602 3rd Qu.: 17.961 3rd Qu.: 50.92
urban :9634 Max. : 55.628 Max. : 78.91
geometry Elevation CLC8 Population
POINT :15881 Min. : -52 LDR :7478 Min. : 0
epsg:3035 : 0 1st Qu.: 328 HDR :3002 1st Qu.: 172
+proj=laea...: 0 Median : 1152 AGR :2032 Median : 2448
Mean : 2036 IND :1357 Mean : 4132
3rd Qu.: 2737 NAT : 941 3rd Qu.: 5910
Max. :30185 (Other): 883 Max. :51127
NA's :1420 NA's : 188
table(station_meta$AirPollutant, station_meta$StationType)
background industrial traffic
PM2.5 1851 323 688
PM10 2830 672 1223
NO2 2740 661 1722
O3 2531 369 271
table(station_meta$AirPollutant, station_meta$StationArea)
rural rural-nearcity rural-regional rural-remote suburban urban
PM2.5 274 46 83 21 617 1821
PM10 477 77 104 25 1086 2956
NO2 551 71 113 30 1095 3263
O3 515 59 151 48 804 1594
table(station_meta$StationArea, station_meta$StationType)
background industrial traffic
rural 1434 348 35
rural-nearcity 201 38 14
rural-regional 440 7 4
rural-remote 124 0 0
suburban 2307 866 429
urban 5446 766 3422
station_meta_sf = sf::st_as_sf(station_meta, coords = c("Longitude","Latitude"), crs = 4326)
# filter(station_meta_sf,
# StationType == "background",
# AirPollutant == "NO2") |>
# mapview::mapview(zcol="StationArea")
# filter(station_meta_sf, is.na(CLC8)) |>
# mapview::mapview(zcol="StationArea")
#
# filter(station_meta_sf, is.na(Elevation)) |>
# mapview::mapview(zcol="Countrycode")
station_final = filter(station_meta_sf,
!is.na(CLC8),
!is.na(Elevation)) |>
select(-AirPollutant) |>
group_by(AirQualityStationEoICode, Countrycode) |>
filter(row_number()==1)
mapview::mapview(station_final, zcol="CLC8")The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, were retired in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
summary(station_final) AirQualityStationEoICode Countrycode StationType
AD0942A: 1 DE : 869 background:3229
AD0944A: 1 IT : 747 industrial: 723
AD0945A: 1 ES : 715 traffic :1740
AL0203A: 1 FR : 706
AL0204A: 1 PL : 461
AL0205A: 1 SE : 237
(Other):5686 (Other):1957
StationArea geometry Elevation CLC8
rural : 631 POINT :5692 Min. : -52 LDR :2787
rural-nearcity: 110 epsg:4326 : 0 1st Qu.: 339 HDR :1060
rural-regional: 179 +proj=long...: 0 Median : 1121 AGR : 719
rural-remote : 47 Mean : 1995 IND : 475
suburban :1333 3rd Qu.: 2676 NAT : 351
urban :3392 Max. :30185 UGR : 166
(Other): 134
Population
Min. : 0
1st Qu.: 624
Median : 2918
Mean : 4503
3rd Qu.: 6363
Max. :51127
Write files
bind_cols(st_drop_geometry(station_final),
data.frame(st_coordinates(station_final)) |>
setNames(c("Longitude","Latitude"))) |>
arrow::write_parquet("AQ_stations/EEA_stations_meta.parquet")
geoarrow::write_geoparquet(station_final, "AQ_stations/EEA_stations_meta_sf.parquet")